This file
loadData <- function(fileName = "train.csv", ...) {
file = paste("../input", fileName, sep="/")
data.table::fread(file, ...) %>%
as_tibble()
}
df = loadData("trainClean_py.csv")
df %<>%
mutate(
X_end = S*cos((90-Dir)*pi/180) + X,
Y_end = S*sin((90-Dir)*pi/180) + Y
)
dfFeatures = loadData("features_py.csv")options(repr.plot.width=10, repr.plot.height=6)
samp_play <- "20170910001102"
plotPlay = function(dfPlay, features=NULL, run=F) {
p = dfPlay %>%
ggplot(aes(X, Y, color = OnOffense)) +
geom_point(size = 2) +
geom_segment(aes(x = X, y = Y, xend = X_end, yend = Y_end),
arrow = arrow(length = unit(.5,"cm"))) +
geom_point(data = filter(dfPlay, NflId == NflIdRusher),
pch=21, size = 1.5, fill = "black") +
# geom_point(data = filter(dfPlay, Position == "QB"), size = 2, fill = "blue") +
scale_colour_brewer(palette = "Set2")+
scale_fill_brewer(palette = "Set2")+
# geom_vline(aes(xintercept=0), color="grey") +
# geom_vline(aes(xintercept=10), color="grey") +
geom_vline(aes(xintercept = LineOfScrimmage), colour = "black", lty = 2) +
coord_cartesian(ylim=c(0,160/3), expand=F) + #xlim = c(-10,110),
labs(
x = "Distance from offensive team's own end zone",
y = "Y",
title = paste0("PlayId ", pull(dfPlay, PlayId)),
subtitle = paste("Yards", dfPlay$Yards, "; ", "Down", dfPlay$Down)
) +
theme_bw(14) +
theme(panel.grid.minor = element_blank(), panel.grid.major.y =element_blank())
if (run && !is.null(features)) {
dfC = circleDF(c(features$LineOfScrimmage, features$Rusher_Gap_Center),
features$Rusher_Gap_Radius)
p +
geom_path(data = dfC, aes(x, y), color="gold") +
labs(subtitle = paste("Yards", dfPlay$Yards, "; ", "Down", dfPlay$Down, "; ",
"NPlayers", features$Rusher_Gap_NPlayers, "; ",
"OpenSize", round(features$Rusher_Gap_OpenSize, 2)))
} else {
p
}
}
circleDF = function(center = c(0,0), r = 1, npoints = 100){
tt <- seq(0, 2*pi, length.out = npoints)
xx <- center[1] + r * cos(tt)
yy <- center[2] + r * sin(tt)
return(data.frame(x = xx, y = yy))
}
dfPlay = filter(df, PlayId == samp_play)
features = filter(dfFeatures, PlayId == samp_play)
plotPlay(dfPlay, features, run=T)First, categorize runs: long, medium, short, bad
Min. 1st Qu. Median Mean 3rd Qu. Max.
-14.000 1.000 3.000 4.212 6.000 99.000
df %<>%
mutate(YardsCategory = cut(Yards, breaks = c(-Inf, 0, 3, 6, Inf),
labels=c("bad", "short", "med", "long")))
table(df$YardsCategory)
bad short med long
103532 184888 117106 104236
PlayId_long = df %>%
filter(YardsCategory == "long") %>%
pull(PlayId)
PlayId_med = df %>%
filter(YardsCategory == "med") %>%
pull(PlayId)
PlayId_short = df %>%
filter(YardsCategory == "short") %>%
pull(PlayId)
PlayId_bad = df %>%
filter(YardsCategory == "bad") %>%
pull(PlayId)Plot several runs from vector of ids
plotPlays = function(ids, n=5) {
for (index in sample.int(length(ids), min(n, length(ids)))) {
id = ids[index]
dfPlay = filter(df, PlayId == id)
features = filter(dfFeatures, PlayId == id)
p = plotPlay(dfPlay, features, run=T)
print(p)
}
}[1] "20181007032152"
strange play
# A tibble: 22 x 38
PlayId GameId X Y S A Dis Orientation Dir NflId
<int6> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 20181… 2.02e9 18.5 43.1 1.99 2.91 0.18 -82.9 65.7 4.36e3
2 20181… 2.02e9 32.0 19.2 2.72 1.96 0.27 -58.0 56.8 2.50e6
3 20181… 2.02e9 15.8 31.5 2.86 3.76 0.27 245. -44.9 2.51e6
4 20181… 2.02e9 19.9 28.8 3.54 3.02 0.33 -80.5 -35.2 2.51e6
5 20181… 2.02e9 23.0 31.1 4.18 3.25 0.4 -84.6 -62.1 2.54e6
6 20181… 2.02e9 20.8 24.0 2.48 1.85 0.24 -84.1 -27.8 2.54e6
7 20181… 2.02e9 16.6 28.9 1.85 2.72 0.18 -86.3 -78.1 2.55e6
8 20181… 2.02e9 19.8 17.3 3.02 3.67 0.28 269. 201. 2.56e6
9 20181… 2.02e9 16.7 26.5 2.84 2.48 0.31 261. -24.5 2.56e6
10 20181… 2.02e9 16.5 22.0 2.24 0.41 0.22 -87.8 -31.9 2.56e6
# … with 12 more rows, and 28 more variables: Quarter <int>,
# PossessionTeam <int>, Down <int>, Distance <int>, FieldPosition <int>,
# NflIdRusher <int>, OffenseFormation <int>, OffensePersonnel <int>,
# DefendersInTheBox <dbl>, DefensePersonnel <int>, PlayDirection <chr>,
# TimeHandoff <chr>, TimeSnap <chr>, Yards <int>, PlayerHeight <int>,
# PlayerWeight <int>, Position <chr>, HomeTeamAbbr <int>,
# VisitorTeamAbbr <int>, Week <int>, StadiumType <int>, Turf <int>,
# GameWeather <int>, LineOfScrimmage <int>, OnOffense <lgl>,
# X_end <dbl>, Y_end <dbl>, YardsCategory <fct>